Marchantiales

Setup

Installing and loading our package

#devtools::install_github("https://github.com/ipb-halle/iESTIMATE")
library(iESTIMATE)

Overview on abstraction levels

To get an overview on abstraction levels and chemical diversity, sunburst plots can be carried out.

Suburst plot of CHEMONT classes

sunBurstPlotFromSubstanceClasses(rownames(marchantiales$div_classes), marchantiales$div_classes$frequency, colorStart=0.0, colorAlpha=0.6)

Sunburst plot of Natural Product Classifier

sunBurstPlotFromSubstanceClasses(rownames(marchantiales$div_npclasses), marchantiales$div_npclasses$frequency, colorStart=0.0, colorAlpha=0.6)

Estimating EMVs at different levels of abstraction

To speed up Random Forest analyses, the use of parallel processing is recommended.

library(parallel)
library(e1071)
library(doMC)
#> Loading required package: foreach
#> Loading required package: iterators
nSlaves <- detectCores(all.tests=FALSE, logical=FALSE)
registerDoMC(nSlaves)

Estimating EMVs at level of molecules

First, we demonstrate the use of PLS-DA to select essential variables.

suppressWarnings(
sel_pls_comp_list <- select_features_pls(feat_matrix=marchantiales$comp_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, components=(nlevels(as.factor(marchantiales$metadata$species))-1))
)
#> [1] "Number of chosen components: 13"

Print selected variables and model metrics.

print(paste("Number of essential variables:", length(unique(unlist(sel_pls_comp_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 791"
print(sel_pls_comp_list$`_selected_variables_`)
#>   [1] "FT00040_neg" "FT00076_neg" "FT00094_neg" "FT00104_neg" "FT00340_neg"
#>   [6] "FT00343_neg" "FT00352_neg" "FT00357_neg" "FT00508_neg" "FT00797_neg"
#>  [11] "FT00968_neg" "FT01114_neg" "FT01116_neg" "FT01551_neg" "FT01576_neg"
#>  [16] "FT01577_neg" "FT01949_neg" "FT02066_neg" "FT02233_neg" "FT02249_neg"
#>  [21] "FT02335_neg" "FT02967_neg" "FT03304_neg" "FT03360_neg" "FT03364_neg"
#>  [26] "FT03392_neg" "FT03416_neg" "FT03460_neg" "FT03571_neg" "FT03572_neg"
#>  [31] "FT03665_neg" "FT03698_neg" "FT03756_pos" "FT03861_neg" "FT03900_neg"
#>  [36] "FT04046_neg" "FT04572_neg" "FT04869_neg" "FT04870_neg" "FT05113_neg"
#>  [41] "FT05196_neg" "FT05261_neg" "FT05511_neg" "FT05775_neg" "FT05894_neg"
#>  [46] "FT05942_neg" "FT05954_neg" "FT06034_neg" "FT06531_neg" "FT06746_neg"
#>  [51] "FT06981_neg" "FT07388_neg" "FT07390_neg" "FT07498_neg" "FT07566_neg"
#>  [56] "FT07678_neg" "FT07721_neg" "FT07783_neg" "FT08088_neg" "FT08118_neg"
#>  [61] "FT08177_neg" "FT08221_neg" "FT08329_neg" "FT08334_neg" "FT08629_neg"
#>  [66] "FT08637_neg" "FT08752_neg" "FT08929_neg" "FT08996_neg" "FT09169_neg"
#>  [71] "FT09386_neg" "FT09522_neg" "FT09736_neg" "FT10018_neg" "FT10317_neg"
#>  [76] "FT10605_neg" "FT10619_neg" "FT10808_neg" "FT10810_neg" "FT11038_neg"
#>  [81] "FT11649_neg" "FT11741_neg" "FT11848_neg" "FT11934_neg" "FT11990_neg"
#>  [86] "FT12247_neg" "FT12249_neg" "FT12589_neg" "FT12590_neg" "FT12936_neg"
#>  [91] "FT13008_neg" "FT13210_neg" "FT13346_neg" "FT13484_pos" "FT13762_neg"
#>  [96] "FT13794_neg" "FT14654_neg" "FT15023_pos" "FT15325_neg" "FT15342_neg"
#> [101] "FT15608_neg" "FT15639_neg" "FT15657_neg" "FT16328_neg" "FT16528_neg"
#> [106] "FT16621_neg" "FT16722_neg" "FT00036_neg" "FT00071_neg" "FT00079_neg"
#> [111] "FT00118_neg" "FT00254_pos" "FT00670_neg" "FT01022_neg" "FT01246_neg"
#> [116] "FT01465_pos" "FT01801_pos" "FT01851_pos" "FT01879_pos" "FT02193_neg"
#> [121] "FT02672_neg" "FT03569_neg" "FT03704_neg" "FT03930_neg" "FT04015_neg"
#> [126] "FT04018_neg" "FT04181_neg" "FT04467_neg" "FT04468_neg" "FT04830_pos"
#> [131] "FT05099_pos" "FT05214_pos" "FT05288_neg" "FT05559_neg" "FT05811_neg"
#> [136] "FT06019_neg" "FT06022_neg" "FT06026_pos" "FT06201_neg" "FT06225_neg"
#> [141] "FT06228_neg" "FT06244_neg" "FT06441_neg" "FT06443_neg" "FT06486_neg"
#> [146] "FT06582_neg" "FT06584_neg" "FT06588_neg" "FT06738_neg" "FT06818_neg"
#> [151] "FT06923_pos" "FT07004_neg" "FT07069_neg" "FT07148_pos" "FT07249_pos"
#> [156] "FT07316_neg" "FT07337_pos" "FT07453_neg" "FT07617_neg" "FT07838_neg"
#> [161] "FT07888_neg" "FT07911_neg" "FT07945_neg" "FT08038_pos" "FT08156_pos"
#> [166] "FT08345_neg" "FT08410_pos" "FT08624_pos" "FT08756_neg" "FT08811_neg"
#> [171] "FT08812_neg" "FT08902_pos" "FT09165_neg" "FT09226_neg" "FT09260_neg"
#> [176] "FT09606_pos" "FT09608_pos" "FT09616_neg" "FT09684_neg" "FT09797_neg"
#> [181] "FT09839_neg" "FT09928_neg" "FT10161_neg" "FT10193_neg" "FT10752_pos"
#> [186] "FT11092_neg" "FT11352_pos" "FT11378_pos" "FT11408_neg" "FT11567_pos"
#> [191] "FT11869_pos" "FT11886_neg" "FT11887_neg" "FT11982_pos" "FT12072_pos"
#> [196] "FT12073_neg" "FT12098_pos" "FT12240_neg" "FT12242_neg" "FT12312_neg"
#> [201] "FT12685_neg" "FT12692_neg" "FT12700_neg" "FT12938_neg" "FT12992_neg"
#> [206] "FT13156_pos" "FT13236_neg" "FT13237_neg" "FT13238_neg" "FT13370_neg"
#> [211] "FT13539_neg" "FT13632_neg" "FT13657_pos" "FT14309_pos" "FT14320_pos"
#> [216] "FT14732_neg" "FT14734_neg" "FT14880_neg" "FT15057_neg" "FT15058_neg"
#> [221] "FT15086_pos" "FT15182_neg" "FT15288_neg" "FT15324_neg" "FT15326_neg"
#> [226] "FT15360_neg" "FT15385_neg" "FT15394_neg" "FT15443_neg" "FT15520_neg"
#> [231] "FT15521_neg" "FT15573_neg" "FT15597_neg" "FT15610_neg" "FT15673_neg"
#> [236] "FT15850_neg" "FT16204_neg" "FT16697_pos" "FT16737_pos" "FT16923_pos"
#> [241] "FT17152_pos" "FT17339_pos" "FT17715_pos" "FT17718_pos" "FT18046_pos"
#> [246] "FT18453_pos" "FT18534_pos" "FT18580_pos" "FT18754_pos" "FT18987_pos"
#> [251] "FT19729_pos" "FT21677_pos" "FT21964_pos" "FT22731_pos" "FT22732_pos"
#> [256] "FT23261_pos" "FT23265_pos" "FT23348_pos" "FT24203_pos" "FT24267_pos"
#> [261] "FT24686_pos" "FT24723_pos" "FT24985_pos" "FT25031_pos" "FT25477_pos"
#> [266] "FT26652_pos" "FT26885_pos" "FT27015_pos" "FT27144_pos" "FT27297_pos"
#> [271] "FT27427_pos" "FT27447_pos" "FT27547_pos" "FT27549_pos" "FT27557_pos"
#> [276] "FT27559_pos" "FT27591_pos" "FT27592_pos" "FT27835_pos" "FT00077_neg"
#> [281] "FT00185_pos" "FT00672_neg" "FT00920_neg" "FT03422_pos" "FT05919_pos"
#> [286] "FT08573_neg" "FT14511_pos" "FT17038_pos" "FT25023_pos" "FT01447_pos"
#> [291] "FT02328_neg" "FT08488_neg" "FT08838_neg" "FT10192_pos" "FT10492_pos"
#> [296] "FT12520_neg" "FT14406_pos" "FT14631_pos" "FT24023_pos" "FT00034_neg"
#> [301] "FT00086_neg" "FT00166_neg" "FT00350_pos" "FT00854_pos" "FT00870_pos"
#> [306] "FT00961_neg" "FT00972_neg" "FT00974_neg" "FT01109_neg" "FT01110_neg"
#> [311] "FT01241_neg" "FT01339_pos" "FT01442_neg" "FT01797_pos" "FT01821_neg"
#> [316] "FT01877_neg" "FT01896_pos" "FT01912_pos" "FT01927_pos" "FT02242_neg"
#> [321] "FT02374_pos" "FT02578_pos" "FT02583_pos" "FT02619_pos" "FT02637_pos"
#> [326] "FT02866_neg" "FT03287_neg" "FT03548_pos" "FT03655_neg" "FT03670_neg"
#> [331] "FT03690_neg" "FT04171_neg" "FT04540_pos" "FT05020_pos" "FT05083_neg"
#> [336] "FT05334_pos" "FT05446_neg" "FT05603_neg" "FT05642_neg" "FT05936_neg"
#> [341] "FT06252_neg" "FT06858_neg" "FT06905_pos" "FT06911_neg" "FT06952_pos"
#> [346] "FT07057_neg" "FT07069_pos" "FT07681_pos" "FT07729_pos" "FT07736_neg"
#> [351] "FT08107_neg" "FT08262_pos" "FT08271_neg" "FT08318_neg" "FT08616_neg"
#> [356] "FT09120_pos" "FT09668_neg" "FT09891_neg" "FT10059_neg" "FT10311_pos"
#> [361] "FT10528_neg" "FT11084_neg" "FT11175_pos" "FT11273_pos" "FT11696_neg"
#> [366] "FT12235_neg" "FT12685_pos" "FT13266_neg" "FT14402_pos" "FT16214_pos"
#> [371] "FT16284_pos" "FT16436_pos" "FT17785_pos" "FT17902_pos" "FT19884_pos"
#> [376] "FT20066_pos" "FT20454_pos" "FT22113_pos" "FT23166_pos" "FT24807_pos"
#> [381] "FT00038_neg" "FT00210_pos" "FT00356_neg" "FT00987_pos" "FT01066_neg"
#> [386] "FT01301_pos" "FT01461_neg" "FT01472_pos" "FT01474_pos" "FT01803_pos"
#> [391] "FT01829_pos" "FT01876_pos" "FT02111_neg" "FT02112_neg" "FT02171_pos"
#> [396] "FT02768_neg" "FT03114_neg" "FT03336_pos" "FT03497_neg" "FT03623_neg"
#> [401] "FT03701_neg" "FT03877_neg" "FT03879_neg" "FT04114_pos" "FT04399_pos"
#> [406] "FT04535_neg" "FT05746_pos" "FT06598_neg" "FT06731_neg" "FT07440_neg"
#> [411] "FT07503_pos" "FT07633_neg" "FT07887_neg" "FT07934_pos" "FT08076_neg"
#> [416] "FT08225_pos" "FT08574_neg" "FT08994_neg" "FT09217_pos" "FT09599_pos"
#> [421] "FT10159_pos" "FT11090_neg" "FT11576_neg" "FT11852_neg" "FT11885_neg"
#> [426] "FT11890_pos" "FT11938_neg" "FT12241_neg" "FT13069_neg" "FT13457_neg"
#> [431] "FT13609_neg" "FT14306_neg" "FT14580_neg" "FT15625_neg" "FT15641_neg"
#> [436] "FT16103_neg" "FT16206_neg" "FT16295_neg" "FT16675_neg" "FT16898_pos"
#> [441] "FT17035_neg" "FT17035_pos" "FT17730_pos" "FT18379_pos" "FT19596_pos"
#> [446] "FT21511_pos" "FT23650_pos" "FT23838_pos" "FT26263_pos" "FT26559_pos"
#> [451] "FT12348_pos" "FT00096_neg" "FT00290_neg" "FT00350_neg" "FT00486_pos"
#> [456] "FT00502_neg" "FT00639_neg" "FT00671_neg" "FT00675_neg" "FT00778_neg"
#> [461] "FT00866_neg" "FT01198_neg" "FT01310_neg" "FT01479_pos" "FT02164_pos"
#> [466] "FT02288_neg" "FT02388_neg" "FT02414_neg" "FT02619_neg" "FT03580_pos"
#> [471] "FT03752_pos" "FT04178_neg" "FT04220_neg" "FT04576_neg" "FT04926_neg"
#> [476] "FT05404_neg" "FT06202_pos" "FT06291_pos" "FT06310_pos" "FT06597_neg"
#> [481] "FT06665_neg" "FT07131_neg" "FT07141_neg" "FT07555_neg" "FT07572_neg"
#> [486] "FT08191_pos" "FT08907_pos" "FT09440_neg" "FT09627_neg" "FT09642_pos"
#> [491] "FT09688_neg" "FT10430_pos" "FT11495_pos" "FT11932_neg" "FT12246_neg"
#> [496] "FT13078_neg" "FT13479_neg" "FT13507_neg" "FT13875_neg" "FT14509_pos"
#> [501] "FT14753_pos" "FT15554_neg" "FT15628_pos" "FT16056_pos" "FT16701_pos"
#> [506] "FT16926_pos" "FT17010_pos" "FT17150_neg" "FT17414_pos" "FT17435_pos"
#> [511] "FT18538_pos" "FT18722_pos" "FT18903_pos" "FT19120_pos" "FT20413_pos"
#> [516] "FT21425_pos" "FT24480_pos" "FT24559_pos" "FT24655_pos" "FT00213_pos"
#> [521] "FT00270_pos" "FT00548_pos" "FT00869_neg" "FT00956_pos" "FT01138_pos"
#> [526] "FT01548_pos" "FT01875_pos" "FT01966_neg" "FT02339_neg" "FT02594_pos"
#> [531] "FT02826_neg" "FT02841_pos" "FT02918_pos" "FT03070_neg" "FT03334_pos"
#> [536] "FT03344_pos" "FT03410_pos" "FT03455_neg" "FT04411_pos" "FT04506_pos"
#> [541] "FT04792_pos" "FT04882_pos" "FT04983_pos" "FT05104_pos" "FT05128_neg"
#> [546] "FT05289_neg" "FT05383_pos" "FT05399_neg" "FT05949_neg" "FT06025_pos"
#> [551] "FT06029_neg" "FT06362_pos" "FT06420_pos" "FT06513_neg" "FT06698_pos"
#> [556] "FT06924_pos" "FT07150_pos" "FT07253_pos" "FT07256_neg" "FT07588_pos"
#> [561] "FT07593_pos" "FT07706_neg" "FT07785_pos" "FT07883_neg" "FT07982_pos"
#> [566] "FT08074_pos" "FT08252_neg" "FT08306_neg" "FT08398_neg" "FT08901_pos"
#> [571] "FT09060_neg" "FT09104_neg" "FT09574_pos" "FT09638_pos" "FT10034_neg"
#> [576] "FT10243_neg" "FT10634_neg" "FT10834_neg" "FT11405_neg" "FT11699_neg"
#> [581] "FT11709_neg" "FT11765_neg" "FT11767_neg" "FT11933_neg" "FT11936_neg"
#> [586] "FT12018_pos" "FT12046_pos" "FT12071_pos" "FT12134_neg" "FT12140_neg"
#> [591] "FT12239_neg" "FT12243_neg" "FT12251_neg" "FT12374_neg" "FT12490_pos"
#> [596] "FT12510_pos" "FT12775_neg" "FT13161_pos" "FT13207_pos" "FT13271_pos"
#> [601] "FT13409_pos" "FT13898_pos" "FT13903_pos" "FT13943_pos" "FT13944_pos"
#> [606] "FT14037_pos" "FT14667_neg" "FT14938_pos" "FT15037_pos" "FT15146_neg"
#> [611] "FT15188_neg" "FT15317_pos" "FT15331_neg" "FT15374_pos" "FT15541_pos"
#> [616] "FT15586_pos" "FT15607_neg" "FT15773_neg" "FT15773_pos" "FT15825_pos"
#> [621] "FT15932_neg" "FT16335_pos" "FT16343_pos" "FT16441_pos" "FT17115_pos"
#> [626] "FT17417_pos" "FT17470_pos" "FT17568_pos" "FT17670_pos" "FT17968_pos"
#> [631] "FT18324_pos" "FT18454_pos" "FT18488_pos" "FT18512_pos" "FT18702_pos"
#> [636] "FT19318_pos" "FT19516_pos" "FT19671_pos" "FT19738_pos" "FT19811_pos"
#> [641] "FT19864_pos" "FT20124_pos" "FT20125_pos" "FT20342_pos" "FT20394_pos"
#> [646] "FT20645_pos" "FT20764_pos" "FT20809_pos" "FT21027_pos" "FT21335_pos"
#> [651] "FT21504_pos" "FT21591_pos" "FT21612_pos" "FT21883_pos" "FT21965_pos"
#> [656] "FT21966_pos" "FT21967_pos" "FT22159_pos" "FT22384_pos" "FT22403_pos"
#> [661] "FT22480_pos" "FT22483_pos" "FT22552_pos" "FT23719_pos" "FT23733_pos"
#> [666] "FT23747_pos" "FT23790_pos" "FT23834_pos" "FT24098_pos" "FT24993_pos"
#> [671] "FT25024_pos" "FT25220_pos" "FT26753_pos" "FT27424_pos" "FT27430_pos"
#> [676] "FT00441_neg" "FT00489_pos" "FT01447_neg" "FT02249_pos" "FT02312_pos"
#> [681] "FT02374_neg" "FT02884_neg" "FT03356_neg" "FT04465_neg" "FT05303_pos"
#> [686] "FT05450_neg" "FT05777_neg" "FT05803_neg" "FT05924_pos" "FT06739_neg"
#> [691] "FT06817_neg" "FT07636_neg" "FT07859_neg" "FT08416_neg" "FT18377_pos"
#> [696] "FT21867_pos" "FT21942_pos" "FT00039_neg" "FT01749_neg" "FT01963_neg"
#> [701] "FT03858_neg" "FT03980_pos" "FT05951_neg" "FT06027_neg" "FT06084_neg"
#> [706] "FT06379_neg" "FT06742_neg" "FT07180_neg" "FT08483_neg" "FT09010_pos"
#> [711] "FT10583_pos" "FT11404_neg" "FT13384_neg" "FT15143_neg" "FT15145_neg"
#> [716] "FT15330_neg" "FT26742_pos" "FT00093_neg" "FT00101_neg" "FT01374_neg"
#> [721] "FT02473_pos" "FT04872_pos" "FT07890_pos" "FT10341_pos" "FT15030_pos"
#> [726] "FT22883_pos" "FT00098_neg" "FT00440_neg" "FT00511_neg" "FT00806_neg"
#> [731] "FT01228_neg" "FT01230_neg" "FT01591_neg" "FT01965_neg" "FT02022_neg"
#> [736] "FT02290_neg" "FT02332_neg" "FT02376_neg" "FT02910_neg" "FT02994_neg"
#> [741] "FT03074_neg" "FT03142_neg" "FT03188_neg" "FT03824_neg" "FT03931_neg"
#> [746] "FT04179_neg" "FT04182_neg" "FT04800_neg" "FT04823_neg" "FT04925_neg"
#> [751] "FT05087_neg" "FT05262_neg" "FT05367_neg" "FT05452_neg" "FT05716_neg"
#> [756] "FT05780_neg" "FT06203_neg" "FT06282_neg" "FT06559_neg" "FT06733_neg"
#> [761] "FT07096_neg" "FT07218_neg" "FT07885_neg" "FT08417_pos" "FT08759_neg"
#> [766] "FT08761_neg" "FT09926_neg" "FT11302_neg" "FT11303_neg" "FT11354_neg"
#> [771] "FT11449_neg" "FT11648_neg" "FT12043_neg" "FT12939_pos" "FT13217_neg"
#> [776] "FT13420_neg" "FT13612_pos" "FT14178_neg" "FT14455_neg" "FT14565_neg"
#> [781] "FT14653_neg" "FT14701_neg" "FT14785_neg" "FT15165_neg" "FT15893_neg"
#> [786] "FT15980_neg" "FT16889_neg" "FT18536_pos" "FT19438_pos" "FT20023_pos"
#> [791] "FT24513_pos"
print(sel_pls_comp_list$`_multiclass_metrics_`)
#> $accuracy
#> [1] 1
#> 
#> $balanced.accuracy
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $DOR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>             Inf             Inf             Inf             Inf             Inf 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>             Inf             Inf             Inf             Inf             Inf 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>             Inf             Inf             Inf             Inf 
#> 
#> $error.rate
#> [1] 0
#> 
#> $F0.5
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $F1
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $F2
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $FDR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FNR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FOR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FPR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $geometric.mean
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $Jaccard
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $L
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>             Inf             Inf             Inf             Inf             Inf 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>             Inf             Inf             Inf             Inf             Inf 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>             Inf             Inf             Inf             Inf 
#> 
#> $lambda
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $MCC
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $MK
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $NPV
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $OP
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $precision
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $recall
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $specificity
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $Youden
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1
print(sel_pls_comp_list$`_model_r2_`)
#> Rsquared 
#>        1

Draw heatmap.

heatmap.selected_features(feat_list=marchantiales$comp_list, sel_feat=sel_pls_comp_list$`_selected_variables_`, sample_colors=marchantiales$colors, plot_width=10, plot_height=10, filename=NULL, main="PLS-DA")

Now, use Random Forest to select for essential variables. RF will also be used for the other abstraction levels.

suppressWarnings(
sel_rf_comp_list <- select_features_random_forest(feat_matrix=marchantiales$comp_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and model metrics.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_comp_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 178"
print(sel_rf_comp_list$`_selected_variables_`)
#>   [1] "FT00731_neg" "FT00949_neg" "FT01576_neg" "FT01637_neg" "FT02335_neg"
#>   [6] "FT03195_neg" "FT05134_neg" "FT05511_neg" "FT05882_neg" "FT08334_neg"
#>  [11] "FT11246_neg" "FT11756_neg" "FT14063_neg" "FT00065_neg" "FT00767_neg"
#>  [16] "FT01246_neg" "FT01317_pos" "FT02412_neg" "FT02482_neg" "FT03182_neg"
#>  [21] "FT04167_neg" "FT04335_pos" "FT04839_pos" "FT05764_pos" "FT06355_neg"
#>  [26] "FT06923_pos" "FT07069_neg" "FT07227_pos" "FT07307_neg" "FT07448_neg"
#>  [31] "FT07947_pos" "FT09074_neg" "FT09260_neg" "FT10307_pos" "FT12126_pos"
#>  [36] "FT12495_pos" "FT12737_neg" "FT13156_pos" "FT13236_neg" "FT13611_neg"
#>  [41] "FT15597_neg" "FT17340_pos" "FT18974_pos" "FT20130_pos" "FT20623_pos"
#>  [46] "FT21504_pos" "FT22558_pos" "FT23905_pos" "FT25471_pos" "FT27295_pos"
#>  [51] "FT27591_pos" "FT28080_pos" "FT28171_pos" "FT00205_pos" "FT00319_neg"
#>  [56] "FT00769_neg" "FT00770_neg" "FT01604_neg" "FT04416_pos" "FT07710_pos"
#>  [61] "FT08793_pos" "FT10029_pos" "FT19424_pos" "FT00676_neg" "FT02047_neg"
#>  [66] "FT02324_pos" "FT03122_neg" "FT04692_pos" "FT04787_neg" "FT07551_neg"
#>  [71] "FT08787_neg" "FT11859_neg" "FT00034_neg" "FT01821_neg" "FT02374_pos"
#>  [76] "FT02716_pos" "FT02866_neg" "FT04637_neg" "FT05773_pos" "FT06905_pos"
#>  [81] "FT06952_pos" "FT22113_pos" "FT00821_neg" "FT01066_neg" "FT01552_neg"
#>  [86] "FT02321_neg" "FT04424_pos" "FT06181_neg" "FT06723_neg" "FT06940_pos"
#>  [91] "FT07221_neg" "FT08051_neg" "FT08865_pos" "FT09155_neg" "FT09623_pos"
#>  [96] "FT09713_neg" "FT09740_pos" "FT11003_neg" "FT13232_pos" "FT13485_pos"
#> [101] "FT13851_pos" "FT14580_neg" "FT15625_neg" "FT15847_neg" "FT18419_pos"
#> [106] "FT18499_pos" "FT19431_pos" "FT20331_pos" "FT20555_pos" "FT00185_pos"
#> [111] "FT01068_pos" "FT01124_pos" "FT01221_pos" "FT05995_pos" "FT12976_pos"
#> [116] "FT16905_pos" "FT17246_neg" "FT00708_pos" "FT03418_pos" "FT04480_pos"
#> [121] "FT05010_pos" "FT08981_neg" "FT12265_pos" "FT14768_pos" "FT16221_pos"
#> [126] "FT00434_pos" "FT00758_neg" "FT01473_neg" "FT02753_neg" "FT02965_neg"
#> [131] "FT05058_pos" "FT09440_neg" "FT16219_neg" "FT00396_pos" "FT04122_pos"
#> [136] "FT04444_pos" "FT06669_pos" "FT06891_pos" "FT06924_pos" "FT08517_neg"
#> [141] "FT11405_neg" "FT12746_pos" "FT29035_pos" "FT04127_pos" "FT05405_pos"
#> [146] "FT05761_pos" "FT08059_pos" "FT14344_pos" "FT17028_pos" "FT18800_pos"
#> [151] "FT21942_pos" "FT22236_pos" "FT03559_neg" "FT03858_neg" "FT05760_pos"
#> [156] "FT06027_neg" "FT06707_neg" "FT08485_pos" "FT17345_pos" "FT17937_pos"
#> [161] "FT22743_pos" "FT00037_neg" "FT00995_pos" "FT02849_pos" "FT03698_pos"
#> [166] "FT04499_pos" "FT04805_pos" "FT14841_pos" "FT22131_pos" "FT00701_pos"
#> [171] "FT02579_pos" "FT08242_pos" "FT08417_pos" "FT09562_pos" "FT10722_neg"
#> [176] "FT10852_pos" "FT11606_pos" "FT14519_pos"
print(sel_rf_comp_list$`_multiclass_metrics_`)
#> $accuracy
#> [1] 1
#> 
#> $balanced.accuracy
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $DOR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>             Inf             Inf             Inf             Inf             Inf 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>             Inf             Inf             Inf             Inf             Inf 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>             Inf             Inf             Inf             Inf 
#> 
#> $error.rate
#> [1] 0
#> 
#> $F0.5
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $F1
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $F2
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $FDR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FNR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FOR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $FPR
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $geometric.mean
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $Jaccard
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $L
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>             Inf             Inf             Inf             Inf             Inf 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>             Inf             Inf             Inf             Inf             Inf 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>             Inf             Inf             Inf             Inf 
#> 
#> $lambda
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               0               0               0               0               0 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               0               0               0               0               0 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               0               0               0               0 
#> 
#> $MCC
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $MK
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $NPV
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $OP
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $precision
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $recall
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $specificity
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1 
#> 
#> $Youden
#>      A.gracilis       A.hyalina      M.fragrans   R.beyrichiana       R.bifurca 
#>               1               1               1               1               1 
#>  R.canaliculata     R.cavernosa     R.ciliifera       R.gothica    R.gougetiana 
#>               1               1               1               1               1 
#> R.hemisphaerica  R.huebeneriana     R.sorocarpa    R.subbifurca 
#>               1               1               1               1
print(sel_rf_comp_list$`_model_r2_`)
#> Rsquared 
#>        1

Draw heatmap.

heatmap.selected_features(feat_list=marchantiales$comp_list, sel_feat=sel_rf_comp_list$`_selected_variables_`, sample_colors=marchantiales$metadata$color, plot_width=8, plot_height=6, cex_col=0.1, cex_row=0.4, filename=NULL, main="Random Forest")

Draw interactive heatmap. In the following we will only plot the interactive heatmap.

library(heatmaply)
heatmaply(scale(marchantiales$comp_list[, which(colnames(marchantiales$comp_list) %in% sel_rf_comp_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of classes

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_class_list <- select_features_random_forest(feat_matrix=marchantiales$class_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_class_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 113"
print(sel_rf_class_list$`_selected_variables_`)
#>   [1] "Diarylthioethers"                               
#>   [2] "Disaccharides"                                  
#>   [3] "Diterpene.glycosides"                           
#>   [4] "Guanidines"                                     
#>   [5] "Hydrolyzable.tannins"                           
#>   [6] "Quinoline.carboxylic.acids"                     
#>   [7] "Tannins"                                        
#>   [8] "Tyrosols"                                       
#>   [9] "X1.1Z.alkenyl.2.acylglycerophosphoethanolamines"
#>  [10] "X5.deoxyribonucleosides"                        
#>  [11] "Anthracenecarboxylic.acids"                     
#>  [12] "Aromatic.monoterpenoids"                        
#>  [13] "Aryl.fluorides"                                 
#>  [14] "Benzamides"                                     
#>  [15] "Dithioles"                                      
#>  [16] "Glycerophospholipids"                           
#>  [17] "Nitrobenzoic.acids.and.derivatives"             
#>  [18] "Oligosaccharides"                               
#>  [19] "Pyrimidine.ribonucleoside.diphosphates"         
#>  [20] "Triterpenoids"                                  
#>  [21] "Alkyl.aryl.ethers"                              
#>  [22] "Aminopyrimidines.and.derivatives"               
#>  [23] "Cyclic.depsipeptides"                           
#>  [24] "Fluorobenzenes"                                 
#>  [25] "Pteridines.and.derivatives"                     
#>  [26] "Purine.3.deoxyribonucleosides"                  
#>  [27] "Pyrimidine.ribonucleoside.monophosphates"       
#>  [28] "Tetraalkylammonium.salts"                       
#>  [29] "X4.hydroxyflavonoids"                           
#>  [30] "Alkoxy.S.triazines"                             
#>  [31] "Alkylarylthioethers"                            
#>  [32] "Ceramides"                                      
#>  [33] "X5.deoxy.5.thionucleosides"                     
#>  [34] "X7.hydroxysteroids"                             
#>  [35] "Alkyl.phosphates"                               
#>  [36] "Azobenzenes"                                    
#>  [37] "Carbonyl.compounds"                             
#>  [38] "Gamma.keto.acids.and.derivatives"               
#>  [39] "Methoxybenzoic.acids.and.derivatives"           
#>  [40] "Methoxyphenols"                                 
#>  [41] "Organic.disulfides"                             
#>  [42] "Piperidines"                                    
#>  [43] "Pyranoflavonoids"                               
#>  [44] "X6.aminopurines"                                
#>  [45] "Aldehydes"                                      
#>  [46] "Benzenediols"                                   
#>  [47] "Benzo.1.4.dioxanes"                             
#>  [48] "Flavonoid.glycosides"                           
#>  [49] "Long.chain.fatty.acids"                         
#>  [50] "Nitrogen.mustard.compounds"                     
#>  [51] "Organooxygen.compounds"                         
#>  [52] "Phenylsulfates"                                 
#>  [53] "Saxitoxins..gonyautoxins..and.derivatives"      
#>  [54] "X1.hydroxy.2.unsubstituted.benzenoids"          
#>  [55] "Acyclic.olefins"                                
#>  [56] "Boronic.acid.esters"                            
#>  [57] "Chlorohydrins"                                  
#>  [58] "Halopyrimidines"                                
#>  [59] "Medium.chain.keto.acids.and.derivatives"        
#>  [60] "Purine.nucleosides"                             
#>  [61] "Purine.ribonucleoside.monophosphates"           
#>  [62] "Purinones"                                      
#>  [63] "Thiophene.carboxylic.acids.and.derivatives"     
#>  [64] "Alkyl.fluorides"                                
#>  [65] "Fatty.alcohols"                                 
#>  [66] "Peptides"                                       
#>  [67] "Styrenes"                                       
#>  [68] "X2.4.disubstituted.thiazoles"                   
#>  [69] "Benzylamines"                                   
#>  [70] "Flavonoid.O.glycosides"                         
#>  [71] "Isoflavonoid.O.glycosides"                      
#>  [72] "Organic.phosphoric.acids.and.derivatives"       
#>  [73] "Pregnane.type.alkaloids"                        
#>  [74] "Thiazoles"                                      
#>  [75] "X1.benzopyrans"                                 
#>  [76] "Alkaloids.and.derivatives"                      
#>  [77] "Benzenoids"                                     
#>  [78] "Branched.alkanes"                               
#>  [79] "Cardenolides.and.derivatives"                   
#>  [80] "Indoles"                                        
#>  [81] "Ortho.amides"                                   
#>  [82] "Propargyl.type.1.3.dipolar.organic.compounds"   
#>  [83] "Amino.acids.and.derivatives"                    
#>  [84] "Azasteroids.and.derivatives"                    
#>  [85] "Heteroaromatic.compounds"                       
#>  [86] "Hybrid.peptides"                                
#>  [87] "Hydroxy.fatty.acids"                            
#>  [88] "Purines.and.purine.derivatives"                 
#>  [89] "Sugar.alcohols"                                 
#>  [90] "Tetrapyrroles.and.derivatives"                  
#>  [91] "Aminobenzenesulfonamides"                       
#>  [92] "Aromatic.anilides"                              
#>  [93] "Azoles"                                         
#>  [94] "Catechols"                                      
#>  [95] "Lipids.and.lipid.like.molecules"                
#>  [96] "Steroids.and.steroid.derivatives"               
#>  [97] "X3.5.cyclic.purine.nucleotides"                 
#>  [98] "Biphenyls.and.derivatives"                      
#>  [99] "Hydrazines.and.derivatives"                     
#> [100] "Organic.phosphonic.acids"                       
#> [101] "Orthocarboxylic.acid.derivatives"               
#> [102] "Podophyllotoxins"                               
#> [103] "Pyrimidones"                                    
#> [104] "Pyrroloindoles"                                 
#> [105] "Sulfenyl.compounds"                             
#> [106] "X6.O.methylated.flavonoids"                     
#> [107] "Alkyl.sulfates"                                 
#> [108] "Diazines"                                       
#> [109] "Dicarboxylic.acids.and.derivatives"             
#> [110] "Furofuran.lignans"                              
#> [111] "Indoles.and.derivatives"                        
#> [112] "Prenol.lipids"                                  
#> [113] "Primary.amines"
#print(sel_rf_class_list$`_multiclass_metrics_`)
print(sel_rf_class_list$`_model_r2_`)
#> Rsquared 
#>        1

Draw interactive heatmap.

heatmaply(scale(marchantiales$class_list[, which(colnames(marchantiales$class_list) %in% sel_rf_class_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of subclasses

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_subclass_list <- select_features_random_forest(feat_matrix=marchantiales$subclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_subclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 70"
print(sel_rf_subclass_list$`_selected_variables_`)
#>  [1] "Anthracenes"                                 
#>  [2] "Glycerophospholipids"                        
#>  [3] "Harmala.alkaloids"                           
#>  [4] "Organic.compounds"                           
#>  [5] "Organic.nitrogen.compounds"                  
#>  [6] "Organonitrogen.compounds"                    
#>  [7] "Organooxygen.compounds"                      
#>  [8] "Purine.nucleosides"                          
#>  [9] "Tannins"                                     
#> [10] "X5.deoxyribonucleosides"                     
#> [11] "Dihydrofurans"                               
#> [12] "Lignans..neolignans.and.related.compounds"   
#> [13] "Nucleosides..nucleotides..and.analogues"     
#> [14] "Organic.acids.and.derivatives"               
#> [15] "Phenol.ethers"                               
#> [16] "Pteridines.and.derivatives"                  
#> [17] "Purine.nucleotides"                          
#> [18] "Pyrans"                                      
#> [19] "X5.5.dinucleotides"                          
#> [20] "Benzenoids"                                  
#> [21] "Diarylheptanoids"                            
#> [22] "Diazanaphthalenes"                           
#> [23] "Furans"                                      
#> [24] "Organic.carbonic.acids.and.derivatives"      
#> [25] "Organic.phosphoric.acids.and.derivatives"    
#> [26] "Organophosphorus.compounds"                  
#> [27] "Pyrimidine.nucleosides"                      
#> [28] "Benzopyrans"                                 
#> [29] "Carboxylic.acids.and.derivatives"            
#> [30] "Stilbenes"                                   
#> [31] "Allyl.type.1.3.dipolar.organic.compounds"    
#> [32] "Azobenzenes"                                 
#> [33] "Organic.disulfides"                          
#> [34] "Oxanes"                                      
#> [35] "Piperidines"                                 
#> [36] "Benzodioxanes"                               
#> [37] "Ergoline.and.derivatives"                    
#> [38] "Peptidomimetics"                             
#> [39] "Saxitoxins..gonyautoxins..and.derivatives"   
#> [40] "Sulfonyls"                                   
#> [41] "Alkaloids.and.derivatives"                   
#> [42] "Boronic.acid.derivatives"                    
#> [43] "Diazines"                                    
#> [44] "Glycerolipids"                               
#> [45] "Halohydrins"                                 
#> [46] "Keto.acids.and.derivatives"                  
#> [47] "Organic.oxygen.compounds"                    
#> [48] "Benzene.and.substituted.derivatives"         
#> [49] "Prenol.lipids"                               
#> [50] "Tetracyclines"                               
#> [51] "Aryl.halides"                                
#> [52] "Tetrapyrroles.and.derivatives"               
#> [53] "Biotin.and.derivatives"                      
#> [54] "Lupin.alkaloids"                             
#> [55] "Organoheterocyclic.compounds"                
#> [56] "Oxacyclic.compounds"                         
#> [57] "Propargyl.type.1.3.dipolar.organic.compounds"
#> [58] "Saturated.hydrocarbons"                      
#> [59] "Heteroaromatic.compounds"                    
#> [60] "Steroids.and.steroid.derivatives"            
#> [61] "Triazines"                                   
#> [62] "Fatty.Acyls"                                 
#> [63] "Dioxaborolanes"                              
#> [64] "Flavonoids"                                  
#> [65] "Organic.phosphonic.acids.and.derivatives"    
#> [66] "Orthocarboxylic.acid.derivatives"            
#> [67] "Pyrroles"                                    
#> [68] "Furanoid.lignans"                            
#> [69] "Indoles.and.derivatives"                     
#> [70] "Organohalogen.compounds"
#print(sel_rf_subclass_list$`_multiclass_metrics_`)
print(sel_rf_subclass_list$`_model_r2_`)
#>  Rsquared 
#> 0.9643195

Draw interactive heatmap.

heatmaply(scale(marchantiales$subclass_list[, which(colnames(marchantiales$subclass_list) %in% sel_rf_subclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of superclasses

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_superclass_list <- select_features_random_forest(feat_matrix=marchantiales$superclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_superclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 16"
print(sel_rf_superclass_list$`_selected_variables_`)
#>  [1] "Lignans..neolignans.and.related.compounds"
#>  [2] "Lipids.and.lipid.like.molecules"          
#>  [3] "Nucleosides..nucleotides..and.analogues"  
#>  [4] "Organic.acids.and.derivatives"            
#>  [5] "Organic.compounds"                        
#>  [6] "Organic.nitrogen.compounds"               
#>  [7] "Organic.oxygen.compounds"                 
#>  [8] "Organoheterocyclic.compounds"             
#>  [9] "Organosulfur.compounds"                   
#> [10] "Phenylpropanoids.and.polyketides"         
#> [11] "Benzenoids"                               
#> [12] "Organophosphorus.compounds"               
#> [13] "Hydrocarbons"                             
#> [14] "Organic.1.3.dipolar.compounds"            
#> [15] "Organohalogen.compounds"                  
#> [16] "Alkaloids.and.derivatives"
#print(sel_rf_superclass_list$`_multiclass_metrics_`)
print(sel_rf_superclass_list$`_model_r2_`)
#>  Rsquared 
#> 0.7605452

Draw interactive heatmap.

heatmaply(scale(marchantiales$superclass_list[, which(colnames(marchantiales$superclass_list) %in% sel_rf_superclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of Natural Product Classes

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_npclass_list <- select_features_random_forest(feat_matrix=marchantiales$npclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_npclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 84"
print(sel_rf_npclass_list$`_selected_variables_`)
#>  [1] "Ascarosides"                                   
#>  [2] "Ceramides"                                     
#>  [3] "Disaccharides"                                 
#>  [4] "Flavones"                                      
#>  [5] "Gallotannins"                                  
#>  [6] "Phenazine.alkaloids"                           
#>  [7] "Phenylethanoids"                               
#>  [8] "Polyamines"                                    
#>  [9] "Polysaccharides"                               
#> [10] "Tropane.alkaloids"                             
#> [11] "Aminosugars"                                   
#> [12] "Anthraquinones.and.anthrones"                  
#> [13] "Cyclic.peptides"                               
#> [14] "Glycerophosphoinositols"                       
#> [15] "Norlabdane.diterpenoids"                       
#> [16] "pteridine.alkaloids"                           
#> [17] "Pulvinones"                                    
#> [18] "Secoiridoid.monoterpenoids"                    
#> [19] "Triacylglycerols"                              
#> [20] "Aminoglycosides"                               
#> [21] "Glycosylmonoacylglycerols"                     
#> [22] "Irregular.monoterpenoids"                      
#> [23] "Kaurane.and.Phyllocladane.diterpenoids"        
#> [24] "Linear.peptides"                               
#> [25] "Open.chain.polyketides"                        
#> [26] "Purine.alkaloids"                              
#> [27] "Pyrrole.alkaloids"                             
#> [28] "Saxitoxins"                                    
#> [29] "Oxidized.glycerophospholipids"                 
#> [30] "Simple.indole.alkaloids"                       
#> [31] "Simple.phenolic.acids"                         
#> [32] "Tripeptides"                                   
#> [33] "X2.pyrone.derivatives"                         
#> [34] "Amino.fatty.acids"                             
#> [35] "Betalain.alkaloids"                            
#> [36] "Flavanones"                                    
#> [37] "Polyene.macrolides"                            
#> [38] "Yohimbine.like.alkaloids"                      
#> [39] "Isoquinoline.alkaloids"                        
#> [40] "Lactones"                                      
#> [41] "Megastigmanes"                                 
#> [42] "Phoslactomycins.or.Phosphazomycins"            
#> [43] "Proanthocyanins"                               
#> [44] "Purine.nucleostides"                           
#> [45] "Unsaturated.fatty.acids"                       
#> [46] "Cephalosporins"                                
#> [47] "Fatty.alcohols"                                
#> [48] "Fatty.aldehydes"                               
#> [49] "Monosaccharides"                               
#> [50] "Oligomycins"                                   
#> [51] "X3.Spirotetramic.acids"                        
#> [52] "Carboline.alkaloids"                           
#> [53] "Cinnamic.acids.and.derivatives"                
#> [54] "Limonoids"                                     
#> [55] "Tetracyclines"                                 
#> [56] "Abietane.diterpenoids"                         
#> [57] "Cardenolides"                                  
#> [58] "Coumaronochromones"                            
#> [59] "Daucane.sesquiterpenoids"                      
#> [60] "Flavonols"                                     
#> [61] "Isoflavones"                                   
#> [62] "Phenoxazine.alkaloids"                         
#> [63] "RiPPs.Cyanobactins"                            
#> [64] "Apocarotenoids.β."                             
#> [65] "Flavan.3.ols"                                  
#> [66] "Indole.diketopiperazine.alkaloids.L.Trp..L.Ala"
#> [67] "Isoindole.alkaloids"                           
#> [68] "Phenylethylamines"                             
#> [69] "Simple.aromatic.polyketides"                   
#> [70] "Cholestane.steroids"                           
#> [71] "Other.Octadecanoids"                           
#> [72] "Depsides"                                      
#> [73] "Dipeptides"                                    
#> [74] "Ergostane.steroids"                            
#> [75] "N.acyl.amines"                                 
#> [76] "Pinane.monoterpenoids"                         
#> [77] "Amino.cyclitols"                               
#> [78] "Depsipeptides"                                 
#> [79] "Fatty.acyl.carnitines"                         
#> [80] "Naphthoquinones"                               
#> [81] "Boromycins"                                    
#> [82] "Furofuranoid.lignans"                          
#> [83] "Hydrocarbons"                                  
#> [84] "Oligomeric.stibenes"
#print(sel_rf_npclass_list$`_multiclass_metrics_`)
print(sel_rf_npclass_list$`_model_r2_`)
#> Rsquared 
#>  0.97691

Draw interactive heatmap.

heatmaply(scale(marchantiales$npclass_list[, which(colnames(marchantiales$npclass_list) %in% sel_rf_npclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of Natural Product Pathways

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_nppathway_list <- select_features_random_forest(feat_matrix=marchantiales$nppathway_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)
#> note: only 6 unique complexity parameters in default grid. Truncating the grid to 6 .

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_nppathway_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 7"
print(sel_rf_nppathway_list$`_selected_variables_`)
#> [1] "Alkaloids"                       "Amino.acids.and.Peptides"       
#> [3] "Carbohydrates"                   "Fatty.acids"                    
#> [5] "Polyketides"                     "Shikimates.and.Phenylpropanoids"
#> [7] "Terpenoids"
#print(sel_rf_nppathway_list$`_multiclass_metrics_`)
print(sel_rf_nppathway_list$`_model_r2_`)
#>  Rsquared 
#> 0.2850012

Draw interactive heatmap.

heatmaply(scale(marchantiales$nppathway_list[, which(colnames(marchantiales$nppathway_list) %in% sel_rf_nppathway_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Estimating EMVs at level of molecular descriptors

Use Random Forest to select for essential variables.

suppressWarnings(
sel_rf_mdes_list <- select_features_random_forest(feat_matrix=marchantiales$mdes_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)

Print selected variables and R-squared.

print(paste("Number of essential variables:", length(unique(unlist(sel_rf_mdes_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 45"
print(sel_rf_mdes_list$`_selected_variables_`)
#>  [1] "C2SP1"     "khs.ddsN"  "khs.dNH"   "khs.dssS"  "khs.sNH3"  "khs.sssN" 
#>  [7] "khs.tsC"   "MDEC.44"   "nC"        "SCH.4"     "khs.ddC"   "khs.sPH2" 
#> [13] "khs.sSH"   "khs.sssNH" "nD"        "VCH.4"     "khs.tCH"   "nR"       
#> [19] "XLogP"     "khs.ssS"   "nRings4"   "khs.aaO"   "khs.dCH2"  "khs.ssssB"
#> [25] "VCH.3"     "ALogP"     "khs.sCl"   "khs.sF"    "khs.ssNH2" "ATSc3"    
#> [31] "khs.aaaC"  "khs.sBr"   "khs.ssssN" "nAtomP"    "nM"        "C1SP1"    
#> [37] "khs.dS"    "MDEO.22"   "nAcid"     "khs.sssB"  "MDEN.11"   "nN"       
#> [43] "nRings7"   "khs.aaNH"  "khs.tN"
#print(sel_rf_mdes_list$`_multiclass_metrics_`)
print(sel_rf_mdes_list$`_model_r2_`)
#>  Rsquared 
#> 0.5718747

Draw interactive heatmap.

heatmaply(scale(marchantiales$mdes_list[, which(colnames(marchantiales$mdes_list) %in% sel_rf_mdes_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)

Close parallel processing

After analyses have been done, close parallel processing.

detach("package:doMC", unload=TRUE)

Export peak tables for MetaboLights

Annotate peak tables in negative ion mode.

f.export_maf(cbind(ms1_def_neg, t(feat_list_neg)), "data/metabolites_maf_neg.tsv")
f.annotate_maf_classes(maf_input="data/metabolites_maf_neg.tsv", maf_output="data/metabolites_maf_neg_classes.tsv")
f.annotate_maf_compounds(maf_input="data/metabolites_maf_neg_classes.tsv", maf_output="data/m_MTBLS2239_LC-MS_negative_reverse-phase_metabolite_profiling_v2_maf.tsv", polarity="neg", xcms_id=rownames(ms1_def_neg), pol_mode=rep("neg",nrow(ms1_def_neg)), smiles=ms1_def_neg$smiles, names=ms1_def_neg$name)

Annotate peak tables in positive ion mode.

f.export_maf(cbind(ms1_def_pos, t(feat_list_pos)), "data/metabolites_maf_pos.tsv")
f.annotate_maf_classes(maf_input="data/metabolites_maf_pos.tsv", maf_output="data/metabolites_maf_pos_classes.tsv")
f.annotate_maf_compounds(maf_input="data/metabolites_maf_pos_classes.tsv", maf_output="data/m_MTBLS2239_LC-MS_positive_reverse-phase_metabolite_profiling_v2_maf.tsv", polarity="pos", xcms_id=rownames(ms1_def_pos), pol_mode=rep("pos",nrow(ms1_def_pos)), smiles=ms1_def_pos$smiles, names=ms1_def_pos$name)